#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#############################################################################
# Copyright (c) 2020 Huawei Technologies Co.,Ltd.
#
# openGauss is licensed under Mulan PSL v2.
# You can use this software according to the terms
# and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
#          http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS,
# WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
# ----------------------------------------------------------------------------
# Description  : gs_collector is a utility
# to collect information about the cluster.
#############################################################################

import os
import sys
import pwd
import time
import json
from datetime import datetime

from gspylib.common.GaussLog import GaussLog
from gspylib.common.Common import DefaultValue
from gspylib.common.ParallelBaseOM import ParallelBaseOM
from gspylib.common.ErrorCode import ErrorCode
from gspylib.common.ParameterParsecheck import Parameter
from impl.collect.OLAP.CollectImplOLAP import CollectImplOLAP
from domain_utils.cluster_file.cluster_log import ClusterLog
from base_utils.os.env_util import EnvUtil
from domain_utils.domain_common.cluster_constants import ClusterConstants
from domain_utils.cluster_os.cluster_user import ClusterUser


def my_obj_pairs_hook(lst):
    result = {}
    count = {}
    for key, val in lst:
        if key in count:
            count[key] = 1 + count[key]
        else:
            count[key] = 1
        if key in result:
            if count[key] >= 2:
                GaussLog.exitWithError(
                    ErrorCode.GAUSS_512["GAUSS_51245"] % key)
            else:
                result[key] = [result[key], val]
        else:
            result[key] = val
    return result


class Collect(ParallelBaseOM):
    """
    define option
    """

    def __init__(self):
        ParallelBaseOM.__init__(self)
        # initialize variable
        self.host = ""
        self.inFile = ""
        self.outFile = ""
        self.nodeName = []
        self.config = {}
        self.appPath = ""

        self.begintime = ""
        self.endtime = ""
        self.keyword = ""
        # speed limit to copy/scp files, in MB/s
        self.speedLimit = 1024
        self.speedLimitFlag = 0
        # max time for collecting xlog when enable dss
        self.timeout = 0

        # config file
        self.configFile = ""

        # enable_dss 
        self.enable_dss = False 

        # Our products may generate 200MB/(1DN per day),
        # So max log size is (8DN * (1master+7standbys) + 1CN) * 200MB = 13GB/node
        # Other logs, such as OM/CM/Audit we ignore them here, which are too small.
        self.LOG_SIZE_PER_DAY_ONE_NODE = 1024 * 13

        # As we test, the speed for packaging logs into a compressed tar file is 45MB/s.
        self.TAR_SPEED = 45

        # endtime - begintime, in days, rounded up.
        self.duration = 0

    #############################################################################
    # Parse and check parameters
    #############################################################################
    def usage(self):
        """
gs_collector is a utility to collect information about the cluster.

Usage:
  gs_collector -? | --help
  gs_collector -V | --version
  gs_collector --begin-time="BEGINTIME" --end-time="ENDTIME" [-h HOSTNAME | -f HOSTFILE] 
                [--keyword=KEYWORD] [--speed-limit=SPEED] [-o OUTPUT] [-l LOGFILE] [-C CONFIGFILE] [--timeout=TIMEOUT]

General options:
      --begin-time=BEGINTIME      Time to start log file collection. Pattern:yyyymmdd hh:mm.
      --end-time=ENDTIME          Time to end log file collection. Pattern:yyyymmdd hh:mm.
      --speed-limit=SPEED         Bandwidth to copy files, a nonnegative integer, in MByte/s.
                                  0 means unlimited. Only supported if rsync command exists.
      --timeout=TIMEOUT           Max time to collect log or xlog when enable dss, in seconds.
                                  0 means using default value.
  -h                              Names of hosts whose information is to be collected.
                                  Example: host1,host2.
  -f                              File listing names of all the hosts to connect to.
      --keyword=KEYWORD           Save log files containing the keyword.
  -o                              Save the result to the specified folder.
  -l                              Path of log file.
  -?, --help                      Show help information for this utility, and exit the command line mode.
  -V, --version                   Show version information.
  -C                              gs_collector config file, listing which info to collect
  # gs_collector.json example
  {
    "Collect":
    [
        {"TypeName": "name", "Content": "value", "Interval": "seconds", "Count": "counts", "FileNumber": "number"} # interval is in Second
    ]
  }

  # TypeName : content
  COLLECT_INFO_MAP
  {
        "System" : "HardWareInfo,RunTimeInfo",
        "Database" : "pg_locks,pg_stat_activity,pg_thread_wait_status",
        "Log" : "DataNode,ClusterManager",
        "XLog": "DataNode",
        "Config" : "DataNode",
        "Gstack" : "DataNode",
        "DssConfig": "DataNode",
        "DssDiskInfo", "vgname, pri_vgname",
        "CoreDump": "gaussdb,GaussMaster,gs_ctl"
        "Trace": "Dump"
        "Plan": "*"    # Any database name or character "*"
   }

        """
        print(self.usage.__doc__)

    def dateCheck(self, datestr):
        """
        function: check the type of date wether is is correct or not
        input : timedate
        output: bool
        """
        # Check the time format
        try:
            time.strptime(datestr, "%Y%m%d %H:%M")
            if (len(datestr.split(" ")[0]) != 8
                    or len(datestr.split(" ")[1]) != 5):
                return False
            return True
        except Exception:
            return False

    def checkIsEnableDss(self):
        path = EnvUtil.getEnv("DSS_HOME")
        if path:
            self.enable_dss = True

    def parseCommandLine(self):
        """
        function: do parse command line
        input : cmdCommand
        output: help/version information
        """
        # Parse command
        ParaObj = Parameter()
        ParaDict = ParaObj.ParameterCommandLine("collector")

        # If help is included in the parameter,
        # the help message is printed and exited
        if (ParaDict.__contains__("helpFlag")):
            self.usage()
            sys.exit(0)
        # Save parameter
        if (ParaDict.__contains__("nodename")):
            self.nodeName = ParaDict.get("nodename")
        # Save parameter hostfile
        if (ParaDict.__contains__("hostfile")):
            self.inFile = ParaDict.get("hostfile")
        # Save parameter begintime
        if (ParaDict.__contains__("begintime")):
            self.begintime = ParaDict.get("begintime")
            # Check the begin time parameter format is correct
            if (not self.dateCheck(self.begintime)):
                GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] %
                                       ('-begin-time',
                                        "date") + " Pattern: yyyymmdd hh:mm.")
        # Save parameter endtime
        if (ParaDict.__contains__("endtime")):
            self.endtime = ParaDict.get("endtime")
            # Check the end time parameter format is correct
            if (not self.dateCheck(self.endtime)):
                GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] %
                                       ('-end-time',
                                        "date") + " Pattern: yyyymmdd hh:mm.")
        # Save parameter keyword
        if (ParaDict.__contains__("keyword")):
            self.keyword = ParaDict.get("keyword")
        # Save parameter outFile
        if (ParaDict.__contains__("outFile")):
            self.outFile = ParaDict.get("outFile")
        # Save parameter logFile
        if (ParaDict.__contains__("logFile")):
            self.logFile = ParaDict.get("logFile")

        # Get speed limit to copy/remote copy files.
        if (ParaDict.__contains__("speedLimit")):
            self.speedLimit = str(ParaDict.get("speedLimit"))
            if (not self.speedLimit.isdigit() or int(self.speedLimit) < 0):
                GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] %
                                       ('-speed-limit',
                                        'a nonnegative integer'))
            self.speedLimit = int(self.speedLimit)
            self.speedLimitFlag = 1

        # Save parameter configFile
        if (ParaDict.__contains__("configFile")):
            self.configFile = ParaDict.get("configFile")

        # Get max time to collect log/xlog.
        if (ParaDict.__contains__("timeout")):
            self.timeout = str(ParaDict.get("timeout"))
            if (not self.timeout.isdigit() or int(self.timeout) < 0):
                GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50003"] % ('-timeout', 'a nonnegative integer'))
            self.timeout = int(self.timeout)

    def checkParameter(self):
        """
        function: do parameters checking
        input : check parameters
        output: ErrorCode
        """

        # The -h and -f parameters can not be specified at the same time
        if (len(self.nodeName) != 0 and self.inFile != ""):
            GaussLog.exitWithError(
                ErrorCode.GAUSS_500["GAUSS_50005"] % ('h', 'f'))

        if (self.inFile != ""):
            # Check if the hostname file exists
            if (not os.path.exists(self.inFile)):
                GaussLog.exitWithError(
                    ErrorCode.GAUSS_502["GAUSS_50201"] % self.inFile)
            # Get the value in the hostname file
            with open(self.inFile, "r") as fp:
                for line in fp:
                    node = line.strip().split("\n")[0]
                    if node is not None and node != "" \
                            and (node not in self.nodeName):
                        self.nodeName.append(node)
            # An error exit if the node name is not available
            if len(self.nodeName) == 0:
                GaussLog.exitWithError(
                    ErrorCode.GAUSS_502["GAUSS_50203"] % self.inFile)
        # check configFile
        if self.configFile == "":
            self.configFile = "%s/%s" % (
                os.path.dirname(os.path.realpath(__file__)),
                ClusterConstants.GS_COLLECTOR_CONFIG_FILE)

        if self.configFile != "":
            # Check if the config file exists
            if not os.path.exists(self.configFile):
                GaussLog.exitWithError(
                    ErrorCode.GAUSS_502["GAUSS_50201"] % self.configFile)
            # Get the value in the configFile file
            try:
                with open(self.configFile, "r") as fp:
                    config_json = json.loads(fp.read(),
                                             object_pairs_hook=my_obj_pairs_hook)
                items = config_json.items()

                for key, value in items:
                    if str(key) != "Collect":
                        GaussLog.exitWithError(
                            ErrorCode.GAUSS_512["GAUSS_51242"] % (
                            self.configFile, str(key)))
                    for it in value:
                        d_c = ""
                        u_c = ""
                        if it["TypeName"] == "XLog" and self.enable_dss:
                            if "FileNumber" not in it:
                                GaussLog.exitWithError(
                                    "ERROR: FileNumber must be in gs_collector config file when dss enabled.")
                            if it["FileNumber"] <= "0":
                                GaussLog.exitWithError("ERROR: FileNumber must be more than 0.")
                        for k, v in it.items():
                            if k not in DefaultValue.COLLECT_CONF_JSON_KEY_LIST:
                                GaussLog.exitWithError(
                                    ErrorCode.GAUSS_512["GAUSS_51242"]
                                    % (self.configFile, str(k)))
                            if k == "TypeName":
                                d_c = DefaultValue.COLLECT_CONF_MAP[v]
                            elif k == "Content":
                                u_c = v
                            elif k == "Interval" or k == "Count" or k == "FileNumber":
                                if (not v.replace(" ", "").isdigit()
                                        or int(v.replace(" ", "")) < 0):
                                    GaussLog.exitWithError(
                                        ErrorCode.GAUSS_512["GAUSS_51241"]
                                        % (k, v))

                        if len(u_c) > 0 and len(d_c) > 0:
                            T_Name = it["TypeName"]
                            it["Content"] = ""
                            if T_Name in "Plan,Database":
                                it["Content"] = u_c
                            else:
                                uc = u_c.replace(" ", "").split(",")
                                for c in uc:
                                    if c not in d_c:
                                        GaussLog.exitWithError(
                                            ErrorCode.GAUSS_512["GAUSS_51243"]
                                            % (c, it['TypeName'],
                                               self.configFile))
                                    elif DefaultValue.COLLECT_CONF_CONTENT_MAP.__contains__(c):
                                        it["Content"] += \
                                            DefaultValue.COLLECT_CONF_CONTENT_MAP[c] + ","
                                    else:
                                        GaussLog.exitWithError(
                                            ErrorCode.GAUSS_512["GAUSS_51244"]
                                            % c)
                            if self.config.__contains__(T_Name):
                                self.config[T_Name].append(it)
                            else:
                                contentList = [it]
                                self.config[T_Name] = contentList
                        else:
                            GaussLog.exitWithError(
                                ErrorCode.GAUSS_512["GAUSS_51240"])

            except Exception as e:
                GaussLog.exitWithError(
                    ErrorCode.GAUSS_512["GAUSS_51239"] % self.configFile)

            if len(self.config) == 0:
                GaussLog.exitWithError(ErrorCode.GAUSS_535["GAUSS_53516"])

        # An error exit if the begin time parameter is not entered
        if (not self.begintime):
            GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"]
                                   % '-begin-time' + " for [gs_collector].")
        else:
            # Extract the time in --end-time according to the format
            self.begintime = self.begintime.replace(" ", "").replace(":", "")

        # An error exit if the end time parameter is not entered
        if (not self.endtime):
            GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50001"]
                                   % '-end-time' + " for [gs_collector].")
        else:
            # Extract the time in --begin-time according to the format
            self.endtime = self.endtime.replace(" ", "").replace(":", "")

        if self.endtime and self.begintime:
            # The start time must be earlier than the end time,
            # notice: using string comparison !!!
            if (self.endtime < self.begintime):
                GaussLog.exitWithError(ErrorCode.GAUSS_500["GAUSS_50004"]
                                       % "-end-time or --begin-time" +
                                       "The value of '--end-time' must"
                                       " be greater than the value "
                                       "of '--begin-time'.")
            datebegin = datetime.strptime(self.begintime, "%Y%m%d%H%M")
            dateend = datetime.strptime(self.endtime, "%Y%m%d%H%M")
            diff = dateend - datebegin
            self.duration = diff.days + 1

        # check mpprc file path
        self.mpprcFile = EnvUtil.getMpprcFile()
        # check if user exist and is the right user
        try:
            self.user = pwd.getpwuid(os.getuid()).pw_name
            ClusterUser.checkUser(self.user)
        except Exception as e:
            GaussLog.exitWithError(str(e))

        # check log file
        if (self.logFile == ""):
            self.logFile = ClusterLog.getOMLogPath(
                ClusterConstants.GS_COLLECTOR_LOG_FILE, self.user, "")

        if (self.speedLimit == 0):
            self.speedLimit = 1024

        if (self.timeout < 0):
            self.timeout = 0


if __name__ == '__main__':
    """
    function: main
    input : NA
    output: NA
    """
    # check if is root user
    if (os.getuid() == 0):
        GaussLog.exitWithError(ErrorCode.GAUSS_501["GAUSS_50105"])
    try:
        # Objectize class
        collectObj = Collect()

        # check if enable dss 
        collectObj.checkIsEnableDss()

        # Initialize self and Parse command line and save to global variable
        collectObj.parseCommandLine()
        # check the parameters is not OK
        collectObj.checkParameter()
        impl = CollectImplOLAP(collectObj)
        impl.run()
    except Exception as e:
        GaussLog.exitWithError(str(e))
    sys.exit(0)
